home *** CD-ROM | disk | FTP | other *** search
- Unit BoolPos;
- {$Define Test}
- { Once debugging is complete, remove the above line to turn off debug mode. }
-
- { Version 1.3.5.P.
-
- Requires Borland Turbo Pascal version 6.0 or later to compile.
-
- Author: Bruce J. Lackore. Created Friday, July 23, 1993.
- Copyright (c) 1993 Bruce J. Lackore. ALL RIGHTS RESERVED.
- }
-
- {$IFDEF Test}
- {$A+,B-,D+,F-,G-,I+,L+,O-,R+,S+,V-,X+}
- {$ELSE}
- {$A+,B-,D-,F-,G-,I-,L-,O-,R-,S-,V-,X+}
- {$ENDIF}
-
- { This unit comprises a function capable of searching a string for multiple
- occurences of substrings using Boolean operators. In the search string,
- Boolean operators And and Or are defined as follows:
-
- & - And
- | - Or
-
- Parentheses are supported for doing multiple searches. Search strings are
- submitted as follows:
-
- i.e. In the source string "The quick brown fox jumped over the lazy dog"
- and the search is for the word blue and the words quick or fox,
- the search string is entered as follows:
-
- (blue&(quick|fox))
-
- The way the function is currently written, And (&) and Or (|) have the same
- precedence level hence the above search string without parentheses would be
- interpretted to be (blue&quick|fox):
-
- blue And quick would be searched for first, the result Or'd with the
- results of the search for fox.
-
- Notice the difference in that (blue&(quick|fox)) is a False statement whilst
- (blue&quick|fox) is True.
-
- The function will automatically scan for () pairs, adding the necessary )
- at the end of the search string or ( at the beginning if required.
-
- The function will also search for (|, |), (& and &) symbols, these being
- illegal.
-
- It should also be noted that although excess parens will not cause the
- function to fail, they DO cause the function to loop unnecessarily through
- the token search (once for each set of parens) while bringing the final
- answer out of the final set of parens.
-
- }
-
- { Bug fixes:
-
- 07/04/1994: Thought the 06/01 fix did the job. It didn't. This time,
- I went back into the token processor and found that it was
- missing a left paren when the tokenized search string was in
- the form of (b@b...)@(b@b...) where b is a boolean designator
- (T or F) and @ is a boolean operator (| or &). Thanx to
- Michael Jarmulowicz for pointing this out.
- The fix was to go into the Process_token_str function and
- ensure that a multi-pass required token string has sufficient
- parens so as to not confuse the token processor.
- Also defined BPos return value should the Fixup_srch_str
- function fail. The default is False (as set in the first
- line of the BPos function itself) and is triggered by
- Fixup_srch_str returning a null string. Removed the "fix"
- that was suggested in the 06/01 bug fix and replaced it with
- code that scans the first and last letters of the Srch_str
- to ensure that they are parens, if not, add a pair.
-
- 06/01/1994: After returning from WestPac, I received a couple of emails
- telling me that if the function was called with NO
- parentheses, it would fail. The fix is simply to add a set of
- parens in the Fixup_srch_str function just before the
- function returns if the first character of the Srch_str is NOT
- a left paren equivalent. I have had one report of the unit
- not working in protected mode. As I don't yet know much about
- protected mode programming, I am still working on that
- particular bug but I WILL fix it if the error is in here. I
- also tightened up one of the assembly replacement functions,
- see the docs for the change.
-
- 10/04/1993: Noticed that length of Src_str in function Next_CPos was
- incorrectly calculated because of positioning of INC DI.
- INC DI precedes the MOV CL,[ES:DI] causing the function to
- consider the first character of Src_str to represent the
- length rather than the actual length byte. Fix is to move
- the INC DI to the line following the MOV CL,[ES:DI].
-
- }
-
- Interface
-
- Function BPos(Srch_str, Src_str: String; Ignore_case: Boolean): Boolean;
-
- { This function accepts a source string and a search string as described above
- and returns a Boolean value based on whether or not the parsed search
- string was found.
- }
-
- { ************************************************************************** }
-
- Implementation
-
- Const
- Lt_pn: Char = '(';
- Rt_pn: Char = ')';
-
- Function Cnt_ch(Scan_char: Char; In_str: String): Byte; Assembler;
-
- { This function will scan a string for occurences of a particular character.
- The function will return the number of occurences.
- }
-
- Asm { Function Cnt_ch }
- XOR AX,AX { 0 AX }
- MOV BL,Scan_char { Put char to count in BL }
- LES SI,In_str { Set ES:SI to point to start of string }
- XOR CX,CX { 0 CX }
- MOV CL,[ES:SI] { Move string length to CX }
- ADD SI,CX { Set ES:SI to point to END of string }
- @LOOK: CMP BL,[ES:SI] { Start Loop, compare current char and BL }
- JNE @NEXT { If not equal, jump to end of loop }
- INC AX { If equal, Inc char cnt (AX) }
- @NEXT: DEC SI { Set ES:SI back one character }
- LOOP @LOOK { Decrement CX and jump to start of loop }
- End; { Function Cnt_ch }
-
- Function Fill_str(Dupe_ch: Char; How_many: Byte): String; Assembler;
-
- { This function returns How_many of Dupe_char.
- }
-
- Asm { Function Fill_str }
- LES DI, @Result { Set ES:DI to function result area }
- CLD { Clear direction flag }
- XOR CH,CH { 0 CH }
- MOV CL,How_many { Length in CX }
- MOV AX,CX { and in AX }
- STOSB { Store length byte }
- MOV AL,Dupe_ch { Put char to dupe in AL }
- REP STOSB { Fill string with char }
- End; { Function Fill_str }
-
- Function PosC(Srch_ch: Char; Src_str: String): Boolean; Assembler;
-
- { This function is similar to the Pos function of Pascal except that it
- accepts only a single character to search for. This function returns a
- True if a Srch_ch is encountered, a False if not.
- }
-
- Asm { Function PosC }
- XOR BX,BX { 0 BX }
- MOV AL,Srch_ch { Put char to look for in AL }
- LES DI,Src_str { Set ES:DI to start of Src_str }
- XOR CX,CX { 0 CX }
- MOV CL,[ES:DI] { Store length of Src_str in CL }
- ADD DI,CX { Set ES:DI to end of string }
- STD { Set direction flag }
- @LOOK: REPNZ SCASB { Look for AL in Src_str }
- JNZ @DONE { If not found, jump to end (BX = 0) }
- INC BX { If Found, Inc Bx to 1 = Pascal True }
- @DONE: MOV AX,BX { Move BX to AX (return result) }
- End; { Function PosC }
-
- Function Last_Cpos(Srch_ch: Char; Src_str: String): Byte; Assembler;
-
- { This function performs the same function as the Pascal POS function except
- that it works only with a single character and rather than returning the
- first position the character is found in, it returns the LAST position that
- the search character is found in.
- }
-
- Asm { Function Last_Cpos }
- MOV AL,Srch_ch { Put char to look for in AL }
- LES DI,Src_str { Set ES:DI to start of Src_str }
- XOR CX,CX { 0 CX }
- MOV CL,[ES:DI] { Move length of Src_str to CL }
- ADD DI,CX { Set ES:DI to end of Src_str }
- INC CX { Add one to CX (correct for string length }
- STD { Set direction flag }
- REPNZ SCASB { Look for character in string }
- MOV AX,CX { If found CX indicates position, else 0 }
- End; { Function Last_Cpos }
-
- Function Next_CPos
- (Srch_ch: Char; Src_str: String; Strt_at: Byte): Byte; Assembler;
-
- { This function searches for the next occurence of Srch_ch in Src_str AFTER
- position Strt_at. The function returns the offset from the beginning of
- the string, NOT the offset from Strt_at.
- }
-
- Asm { Function Next_CPos }
- XOR AX,AX { 0 AX }
- MOV AL,Strt_at { Move position to start at to AL }
- LES DI,Src_str { Set ES:DI to start of Src_str }
- XOR CX,CX { 0 CX }
- MOV CL,[ES:DI] { Store length of Src_str in CL }
- INC DI { Set ES:DI to first char of Src_str }
- MOV BX,CX { Move CX to BX }
- SUB CX,AX { Set CX to length of string after Strt_at }
- ADD DI,AX { Set ES:DI to char at Strt_at in Src_str }
- MOV AL,Srch_ch { Move Srch_ch to AL }
- CLD { Clear direction flag }
- REPNZ SCASB { Look for character following Strt_at }
- JNZ @NOTFND { If not found, jump to end of procedure }
- SUB BX,CX { Set BX to position char found in }
- JMP @DONE { Jump to end of procedure }
- @NOTFND: XOR BX,BX { Srch_ch not found, set BX to 0 }
- @DONE: MOV AX,BX { Move position found at (BX) to AX }
- End; { Function Next_CPos }
-
- {$F+}
- Function Up_cs(In_str: String): String;
-
- { This function converts In_str to all upper case characters.
- }
-
- Begin { Function Up_cs }
- Inline(
- $1E/ { PUSH DS }
- $C4/$7E/$0A/ { LES DI,[BP+$0A] }
- $C5/$76/$06/ { LDS SI,[BP+$06] }
- $30/$E4/ { XOR AH,AH }
- $AC/ { LODSB }
- $AA/ { STOSB }
- $89/$C1/ { MOV CX,AX }
- $E3/$0F/ { JCXZ DONE }
- $FC/ { CLD }
- $AC/ {DOCHAR: LODSB }
- $3C/$61/ { CMP AL,'a' }
- $72/$06/ { JB NEXTCH }
- $3C/$7A/ { CMP AL,'z' }
- $77/$02/ { JA NEXTCH }
- $24/$DF/ { AND AL,$DF }
- $AA/ {NEXTCH: STOSB }
- $E2/$F2/ { LOOP DOCHAR }
- $1F) {DONE: POP DS }
- End; { Function Up_cs }
- {$F-}
-
- Function Fixup_srch_str(Srch_str: String): String;
-
- { This functions sole purpose in life is to count the number of parantheses
- pairs and correct for a deficient number of either by adding the appropriate
- character either at the beginning or the end of the search string. This
- may not yield the correct result as the searcher intended but is a
- requirement of the algorithm (it searches for paran pairs). Note that the
- function will add one set of parantheses if none are found. This function
- also looks for illegal character pairs (&, &), (| and |), these pairs
- indicate an illegal Boolean search. The function returns the corrected
- Srch_str if all is well, an empty string if not.
- }
-
- Var
- Left_para,
- Right_para,
- How_many: Integer;
-
- Begin { Function Fixup_srch_str }
- If (Srch_str[Length(Srch_str)] <> Rt_pn) Or (Srch_str[1] <> Lt_pn) Then
- Srch_str := Lt_pn + Srch_str + Rt_pn;
- Left_para := Cnt_ch(Lt_pn, Srch_str); { Count the parens }
- Right_para := Cnt_ch(Rt_pn, Srch_str);
- How_many := Abs(Left_para - Right_para); { Get the difference }
- If How_many > 0 Then
- If Right_para < Left_para Then
- Srch_str := Srch_str + Fill_str(Rt_pn, How_many)
- Else
- Srch_str := Fill_str(Lt_pn, How_many) + Srch_str;
- If (Pos(Lt_pn + '&', Srch_str) <> 0) Or { Illegal call? }
- (Pos('&' + Rt_pn, Srch_str) <> 0) Or
- (Pos(Lt_pn + '|', Srch_str) <> 0) Or
- (Pos('|' + Rt_pn, Srch_str) <> 0) Then
- Fixup_srch_str := ''
- Else
- Fixup_srch_str := Srch_str { All is well }
- End; { Function Fixup_srch_str }
-
- Function Parse_srch_str(Srch_str, Src_str: String): String;
-
- { This function simply extracts each string to search for, tests to see if
- it exists in the original string and replaces the extracted substring with
- the appropriate token. It should be noted that each substring is determined
- solely by the characters used for parantheses and operators. Any other
- characters are assumed to be part of the search string.
-
- Each substring is searched for in the original Search_str and its presense
- or absense noted with a T or F respectively.
- }
-
- Var
- Rtn_str,
- Token_str: String;
- End_token: Boolean;
-
- Begin { Function Parse_srch_str }
- Token_str := '';
- Rtn_str := '';
- While Srch_str <> '' Do
- Begin
- If (Srch_str[1] In [Lt_pn, Rt_pn, '&', '|']) Then { Token starts? }
- Begin
- End_token := (Token_str <> ''); { End of token? If not }
- If Not(End_token) Then { then start one. }
- Rtn_str := Rtn_str + Srch_str[1]
- End
- Else
- Begin
- Token_str := Token_str + Srch_str[1]; { Add a char to substring }
- End_token := False
- End;
- If End_token Then { If complete token, look }
- Begin { for it in the source str }
- If Pos(Token_str, Src_str) <> 0 Then
- Rtn_str := Rtn_str + 'T' { If found, return T }
- Else
- Rtn_str := Rtn_str + 'F'; { If not, return F }
- Rtn_str := Rtn_str + Srch_str[1];
- Token_str := ''; { Reset to look for more }
- End_token := False
- End; { If End_token }
- Delete(Srch_str, 1, 1) { Delete the char just
- processed and start again
- }
- End; { While Srch_str <> '' }
- Parse_srch_str := Rtn_str
- End; { Function Parse_srch_str }
-
- Function Process_token_str(Token_str: String): Char;
-
- Var
- One_token: String;
- One_token_len,
- Left_para: Byte;
-
- Function Process_one_token_str(The_token: String): Char;
-
- Var
- Lcv: Byte;
- Curr_answer,
- Do_and: Boolean;
-
- Begin { Function Process_one_token_str }
- Curr_answer := (The_token[1] = 'T'); { Establish current answer
- by checking first token.
- }
- For Lcv := 2 to Length(The_token) Do { Look at the rest of the
- token str.
- }
- Case The_token[Lcv] of { Boolean op is And }
- '&': Do_and := True; { Boolean op is Or }
- '|': Do_and := False;
- 'T': If Do_and Then
- Curr_answer := Curr_answer And True { If And }
- Else
- Curr_answer := True; { If Or }
- 'F': If Do_and Then { If And (Or stays T) }
- Curr_answer := False;
- End; { Case }
- If Curr_answer Then { Final result }
- Process_one_token_str := 'T'
- Else
- Process_one_token_str := 'F'
- End; { Function Process_one_token_str }
-
- Begin { Function Process_token_str }
-
- { Are parens present? If so process as tokenized phrase, if not, final
- result has been received or can be processed in a single pass.
- }
-
- If PosC(Lt_pn, Token_str) Then
- Begin
- While Length(Token_str) > 1 Do
- Begin
-
- { Ensure that the token has enough parens to not confuse the
- token string processor. One need only check for a left paren
- since the Fixup_srch_str function ensures that an equal number
- of paren PAIRS exists.
- }
-
- If Not(PosC(Lt_pn, Token_str)) Then
- Token_str := Lt_pn + Token_str + Rt_pn;
-
- { Find leftmost left paren }
-
- Left_para := Last_Cpos(Lt_pn, Token_str);
-
-
- { Find first right paren after leftmost left paren }
-
- One_token_len :=
- Succ(Next_CPos(Rt_pn, Token_str, Left_para) - Left_para);
-
- { Copy everything between the two }
-
- One_token := Copy(Token_str, Left_para, One_token_len);
-
- { Remove the parens }
-
- Dec(One_token[0]);
- Delete(One_token, 1, 1);
-
- { Remove the original substring from the phrase }
-
- Delete(Token_str, Left_para, One_token_len);
-
- { Insert the resultant single character in place of the old
- substring.
- }
-
- Insert(Process_one_token_str(One_token), Token_str, Left_para)
- End; { While Length(Token_str) > 1 }
- Process_token_str := Token_str[1]
- End
- Else
- Process_token_str := Process_one_token_str(One_token)
- End; { Function Process_token_str }
-
- Function BPos;
-
- Begin { Function BPos }
- BPos := False;
- If Ignore_case Then
- Begin
- Srch_str := Up_cs(Srch_str);
- Src_str := Up_cs(Src_str)
- End; { If Ignore_case }
-
- { Is this a Boolean expression? If so process with this function, else
- process with Pascal POS function.
- }
-
- If PosC('|', Srch_str) Or PosC('&', Srch_str) Then
- Begin
- Srch_str := Parse_srch_str(Fixup_srch_str(Srch_str), Src_str);
- If Srch_str <> '' Then
- BPos := (Process_token_str(Srch_str) = 'T')
- End
- Else
- BPos := Pos(Srch_str, Src_str) <> 0
- End; { Function BPos }
-
- End. { Unit BoolPos }
-
- Program Test;
- {$Define test}
-
- { Version 1.0.0.T
-
- Requires Borland Turbo Pascal version 6.0 or later to compile.
-
- Author: Bruce J. Lackore. Created Monday, June 13, 1994.
- Copyright (c) 1994 Bruce J. Lackore. ALL RIGHTS RESERVED.
- }
-
- {$IFDEF Test}
- {$A+,B-,D+,E+,F-,G-,I+,L+,N-,R+,S+,V-,X+}
- {$ELSE}
- {$A+,B-,D-,E+,F-,G-,I-,L-,N-,R-,S-,V-,X+}
- {$ENDIF}
-
- {$M 16384, 0, 655360}
-
- { This is a quick and really dirty test program for the Boolpos unit. Just
- tinker with the search phrase in line 3 of the code and enjoy!
- }
-
- Uses Boolpos;
-
- Var
- BResult: Boolean;
- Src_str: String;
-
- Procedure Start_program;
-
- Begin { Procedure Start_program }
- BResult := False;
- Src_str := 'Now is the time for all good programmers to switch to OS/2';
- BResult := BPos('(Now&then)|(time&bad)', Src_str, False)
- End; { Procedure Start_program }
-
- Begin { Program: Test }
- Start_program;
- End. { Program: Test }